package au.com.acpfg.misc.biojava;
import org.knime.core.data.*;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.JoinedRow;
import org.knime.core.data.def.StringCell;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.biojava.bio.symbol.*;
import org.biojava.bio.seq.*;
public class SequenceTranslationProcessor implements BioJavaProcessorInterface {
private boolean m_convert_dna2prot;
private boolean m_convert_rna2prot;
private boolean m_convert_dna2rna;
public SequenceTranslationProcessor(BioJavaProcessorNodeModel m, String task) {
m_convert_dna2prot = false;
m_convert_rna2prot = false;
m_convert_dna2rna = false;
task = task.toLowerCase().trim();
if (task.endsWith("dna to protein sequence")) {
m_convert_dna2prot = true;
} else if (task.endsWith("rna to protein sequence")) {
m_convert_rna2prot = true;
} else {
m_convert_dna2rna = true;
}
}
public void execute(BioJavaProcessorNodeModel m, ExecutionContext exec,
NodeLogger l, BufferedDataTable[] inData, BufferedDataContainer c)
throws Exception {
if (m.areSequencesProtein()) {
throw new Exception("Protein Sequences cannot (currently) be converted!");
}
boolean is_rna = m.areSequencesRNA();
boolean is_dna = m.areSequencesDNA();
RowIterator it = inData[0].iterator();
if (!m_convert_dna2prot && !m_convert_rna2prot && !m_convert_dna2rna) {
throw new InvalidSettingsException("Implementation error -- please contact the author of this node!");
}
if ((is_rna && !m_convert_rna2prot) ||
is_dna && !(m_convert_dna2prot || m_convert_dna2rna)) {
throw new InvalidSettingsException("Sequence type does not match requested task -- reconfigure this node!");
}
int done = 0;
int n_rows = inData[0].getRowCount();
while (it.hasNext()) {
DataRow r = it.next();
String seq = m.getSequence(r);
// skip missing sequences -- TODO: should we put into output table?
if (seq == null || seq.length() < 1)
continue;
if (m_convert_dna2rna) {
// convert DNA sequence to RNA
SymbolList dna = m.getSequenceAsSymbol(seq);
// ensure multiple of 3 (trim excess)
if (dna.length() % 3 != 0) {
dna = dna.subList(1, dna.length() - (dna.length() % 3));
}
SymbolList rna = DNATools.toRNA(dna);
seq = rna.seqString();
} else if (m_convert_rna2prot){
// convert RNA to protein
SymbolList rna = m.getSequenceAsSymbol(seq);
// ensure multiple of 3 (trim excess)
if (rna.length() % 3 != 0) {
rna = rna.subList(1, rna.length() - (rna.length() % 3));
}
seq = RNATools.translate(rna).seqString();
} else if (m_convert_dna2prot) {
SymbolList syms = DNATools.createDNA(m.getSequence(r));
syms = DNATools.toRNA(syms);
// ensure multiple of 3 (trim excess)
if (syms.length() % 3 != 0) {
syms = syms.subList(1, syms.length() - (syms.length() % 3));
}
SymbolList prot = RNATools.translate(syms);
seq = prot.seqString();
} else {
throw new InvalidSettingsException("Unknown conversion -- implementation bug!");
}
DataCell[] cells = new DataCell[1];
cells[0] = new StringCell(seq);
c.addRowToTable(new JoinedRow(r, new DefaultRow(r.getKey(), cells)));
done++;
if (done % 100 == 0) {
exec.checkCanceled();
exec.setProgress(((double)done)/n_rows, "Processed row "+r.getKey());
}
}
}
public DataTableSpec get_table_spec() {
DataColumnSpec[] allColSpecs = new DataColumnSpec[1];
allColSpecs[0] =
new DataColumnSpecCreator("Converted Sequence", StringCell.TYPE).createSpec();
DataTableSpec outputSpec = new DataTableSpec(allColSpecs);
return outputSpec;
}
@Override
public boolean isMerged() {
return true;
}
}